# Set options for report generation
knitr::opts_chunk$set(
fig.width = 6,
fig.asp = 0.8,
out.width = "80%"
)
# Deactivate scientific notation
options(scipen=999)
# Load packages
pacman::p_load(
rlang,
an9elproject,
an9elversions,
magrittr,
tidyverse,
lubridate,
naniar,
mice,
survival,
survminer,
gridExtra,
# ggstance,
ggthemes,
ggpval,
ggstatsplot,
# GGally,
install = FALSE, update = FALSE
)
# source("http://peterhaschke.com/Code/multiplot.R") #load multiplot function
# Custom functions
# Run Shapiro-Wilks test on multiple variables
shapiro_test_summary <- function(data, columns) {
results <- list()
for (col in columns) {
test_result <- shapiro.test(data[[col]])
is_normal <- ifelse(test_result$p.value > 0.05, "Normally distributed", "Not normally distributed")
results[[col]] <- list(
p_value = test_result$p.value,
normality = is_normal
)
}
return(results)
}
# Run Kolmogorov-Smirnov test on multiple variables
lillie_test_summary <- function(data, columns) {
results <- list()
for (col in columns) {
test_result <- nortest::lillie.test(data[[col]])
is_normal <- ifelse(test_result$p.value > 0.05, "Normally distributed", "Not normally distributed")
results[[col]] <- list(
p_value = test_result$p.value,
normality = is_normal
)
}
return(results)
}
# Stacked density plot and boxplot
stacked_density_plot_and_boxplot = function(data_df, variable_to_plot, plot_title) {
outplot = ggplot(data = data_df,
aes(x = {{variable_to_plot}},
y = -1)) +
geom_boxplot(outlier.colour = "red") +
stat_boxplot(geom = "errorbar") +
geom_density(aes(x = {{variable_to_plot}},
y = stat(scaled)),
fill = "lightgrey",
inherit.aes = FALSE) +
#ylim(range(!!variable_to_plot, na.rm = TRUE)) +
labs(title = plot_title,
x = "",
y = "") +
theme_hc(base_size = 8) +
theme(#axis.line.y = element_blank(),
#axis.title.x = element_blank(),
#axis.text.x = element_blank(),
#axis.ticks.x = element_blank(),
plot.title = element_text(hjust = 0.5),
aspect.ratio = 3/5) +
coord_flip()
return(outplot)
}
# Fancy barplot showing percentages
fancy_barplot_percentage = function(dataframe, xcolname) {
outplot = ggplot(dataframe,
aes(x = {{xcolname}}, #
fill = {{xcolname}})) + #
geom_bar(aes(y = (..count..)/sum(..count..)), colour = "black") +
geom_text(aes(label = scales::percent((..count..)/sum(..count..)),
y = (..count..)/sum(..count..)),
stat = "count",
vjust = -0.5) +
theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(colour = "black", size = 10),
axis.text.y = element_text(colour = "black")) +
scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
# xlab(
# str_to_title(
# str_replace_all(
# substitute(xcolname), pattern = "_", replacement = " "))) +
ylab("%") +
scale_fill_brewer(palette = "Set3")
return(outplot)
}
# Fancy barplot for categorical variable with controls vs cases
fancy_barplot_ctrls_vs_cs = function (dataframe, xcolname) {
outplot = ggplot(dataframe, aes(x = {{xcolname}}, fill = patient_group)) +
geom_bar(aes(y = (..count..)/sum(..count..)),
colour = "black",
position = "dodge") +
theme_bw() +
labs(fill = "Experimental group") +
theme(axis.text.x = element_text(colour = "black", size = 10),
axis.text.y = element_text(colour = "black")) +
scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
xlab(
str_to_title(
str_replace_all(
substitute(xcolname), pattern = "_", replacement = " "))) +
ylab("%") +
scale_fill_brewer(palette = "Set3")
return(outplot)
}
# Load database
oncoth1 = get_project("oncothr1", version = "0.0.8003")
## ! Your oncothr1 project is version: 0.0.8003
## ! Last oncothr1 version online is: 0.0.8005
## ℹ Please, update an9elversions
# load("/mnt/ir-bioinf02/home/blobato/oncothromb01/oncothr1_versions/oncothr1.RData_0.0.8003")
# oncoth1 = obj
# rm(obj)
# Get data slot
oncoth1_data = oncoth1$data
# Transform some variables for better readability
oncoth1_data = oncoth1_data %>%
mutate(khorana_risk_score_three_categories = case_when(
khorana_risk_score == 0 ~ "Low risk",
khorana_risk_score == 1 ~ "Intermediate risk",
khorana_risk_score == 2 ~ "Intermediate risk",
khorana_risk_score == 3 ~ "High risk",
khorana_risk_score == 4 ~ "High risk",
khorana_risk_score == 5 ~ "High risk",
khorana_risk_score == NA ~ NA_character_), .after = khorana_risk_score) %>%
mutate(khorana_risk_score_three_categories = factor(
x = khorana_risk_score_three_categories,
levels = c("Low risk", "Intermediate risk", "High risk"),
labels = c("Low risk", "Intermediate risk", "High risk")
))
# Select variables
vars2select = c(
"patient_group",
"age_when_cancer_dx",
"gender",
"ethnicity",
"bmi_category",
"performance_status_category_corrected",
"n_comorbidities",
"arterial_hypertension",
"chronic_cardiac_insufficiency",
"diabetes_mellitus",
"dyslipidemia",
"tobacco_use",
"copd",
"renal_insufficiency",
"venous_insufficiency",
"hormone_therapy",
"antiaggreg_tx_currently",
"previous_vte",
"previous_ate",
"family_background_vte",
"khorana_risk_score_three_categories",
"primary_tumor",
"tnm_stage_grouped",
"grade_histological_differentiation",
"tumor_surgically_removed",
"catheter_device",
"major_surgery_in_medical_history",
"n_times_recent_major_surgery_all_study",
"n_times_major_trauma_all_study",
"n_times_blood_transfusion_all_study",
"n_times_immobilisation_all_study",
"n_times_erythropoietin_tx_all_study"
)
oncoth1_subset = oncoth1_data %>%
select(all_of(vars2select)) %>%
mutate(across(c(
n_times_recent_major_surgery_all_study,
n_times_major_trauma_all_study,
n_times_blood_transfusion_all_study,
n_times_immobilisation_all_study,
n_times_erythropoietin_tx_all_study), ~ as.factor(.x)))
# Make summary table
oncoth1_st = arsenal::tableby(patient_group ~., data = oncoth1_subset)
summary(oncoth1_st)
##
##
## | | Case (N=88) | Control (N=302) | Total (N=390) | p value|
## |:-------------------------------------------|:---------------:|:---------------:|:---------------:|-------:|
## |**age_when_cancer_dx** | | | | 0.963|
## | Mean (SD) | 64.284 (11.226) | 64.225 (10.328) | 64.238 (10.522) | |
## | Range | 42.000 - 86.000 | 33.000 - 93.000 | 33.000 - 93.000 | |
## |**gender** | | | | 0.224|
## | Male | 53 (60.2%) | 203 (67.2%) | 256 (65.6%) | |
## | Female | 35 (39.8%) | 99 (32.8%) | 134 (34.4%) | |
## |**ethnicity** | | | | 0.675|
## | Caucasian | 87 (98.9%) | 291 (96.4%) | 378 (96.9%) | |
## | Latin-American | 1 (1.1%) | 9 (3.0%) | 10 (2.6%) | |
## | Black/African American | 0 (0.0%) | 1 (0.3%) | 1 (0.3%) | |
## | Sephardic Jew | 0 (0.0%) | 1 (0.3%) | 1 (0.3%) | |
## |**bmi_category** | | | | 0.986|
## | Underweight | 3 (3.4%) | 11 (3.6%) | 14 (3.6%) | |
## | Normal | 43 (48.9%) | 152 (50.3%) | 195 (50.0%) | |
## | Overweight | 30 (34.1%) | 102 (33.8%) | 132 (33.8%) | |
## | Obese | 12 (13.6%) | 37 (12.3%) | 49 (12.6%) | |
## |**performance_status_category_corrected** | | | | 0.046|
## | N-Miss | 1 | 7 | 8 | |
## | 0 | 24 (27.6%) | 121 (41.0%) | 145 (38.0%) | |
## | 1 | 52 (59.8%) | 152 (51.5%) | 204 (53.4%) | |
## | 2 | 11 (12.6%) | 22 (7.5%) | 33 (8.6%) | |
## |**n_comorbidities** | | | | 0.992|
## | 0 | 28 (31.8%) | 94 (31.1%) | 122 (31.3%) | |
## | 1 | 27 (30.7%) | 94 (31.1%) | 121 (31.0%) | |
## | 2+ | 33 (37.5%) | 114 (37.7%) | 147 (37.7%) | |
## |**arterial_hypertension** | | | | 0.993|
## | No | 49 (55.7%) | 168 (55.6%) | 217 (55.6%) | |
## | Yes | 39 (44.3%) | 134 (44.4%) | 173 (44.4%) | |
## |**chronic_cardiac_insufficiency** | | | | 0.336|
## | No | 87 (98.9%) | 293 (97.0%) | 380 (97.4%) | |
## | Yes | 1 (1.1%) | 9 (3.0%) | 10 (2.6%) | |
## |**diabetes_mellitus** | | | | 0.253|
## | No | 75 (85.2%) | 241 (79.8%) | 316 (81.0%) | |
## | Yes | 13 (14.8%) | 61 (20.2%) | 74 (19.0%) | |
## |**dyslipidemia** | | | | 0.096|
## | No | 51 (58.0%) | 204 (67.5%) | 255 (65.4%) | |
## | Yes | 37 (42.0%) | 98 (32.5%) | 135 (34.6%) | |
## |**tobacco_use** | | | | 0.184|
## | N-Miss | 0 | 4 | 4 | |
## | Never has smoked | 28 (31.8%) | 87 (29.2%) | 115 (29.8%) | |
## | Former smoker | 35 (39.8%) | 149 (50.0%) | 184 (47.7%) | |
## | Active smoker | 25 (28.4%) | 62 (20.8%) | 87 (22.5%) | |
## |**copd** | | | | 0.139|
## | N-Miss | 0 | 1 | 1 | |
## | No | 80 (90.9%) | 255 (84.7%) | 335 (86.1%) | |
## | Yes | 8 (9.1%) | 46 (15.3%) | 54 (13.9%) | |
## |**renal_insufficiency** | | | | 0.588|
## | No | 83 (94.3%) | 289 (95.7%) | 372 (95.4%) | |
## | Yes | 5 (5.7%) | 13 (4.3%) | 18 (4.6%) | |
## |**venous_insufficiency** | | | | 0.493|
## | N-Miss | 0 | 1 | 1 | |
## | No | 79 (89.8%) | 262 (87.0%) | 341 (87.7%) | |
## | Yes | 9 (10.2%) | 39 (13.0%) | 48 (12.3%) | |
## |**hormone_therapy** | | | | 0.890|
## | No | 87 (98.9%) | 298 (98.7%) | 385 (98.7%) | |
## | Yes | 1 (1.1%) | 4 (1.3%) | 5 (1.3%) | |
## |**antiaggreg_tx_currently** | | | | 0.063|
## | No | 80 (90.9%) | 250 (82.8%) | 330 (84.6%) | |
## | Yes | 8 (9.1%) | 52 (17.2%) | 60 (15.4%) | |
## |**previous_vte** | | | | 0.021|
## | No | 82 (93.2%) | 296 (98.0%) | 378 (96.9%) | |
## | Yes | 6 (6.8%) | 6 (2.0%) | 12 (3.1%) | |
## |**previous_ate** | | | | 0.174|
## | No | 82 (93.2%) | 266 (88.1%) | 348 (89.2%) | |
## | Yes | 6 (6.8%) | 36 (11.9%) | 42 (10.8%) | |
## |**family_background_vte** | | | | 0.090|
## | No | 81 (92.0%) | 291 (96.4%) | 372 (95.4%) | |
## | Yes | 7 (8.0%) | 11 (3.6%) | 18 (4.6%) | |
## |**khorana_risk_score_three_categories** | | | | 0.157|
## | N-Miss | 0 | 1 | 1 | |
## | Low risk | 19 (21.6%) | 89 (29.6%) | 108 (27.8%) | |
## | Intermediate risk | 47 (53.4%) | 160 (53.2%) | 207 (53.2%) | |
## | High risk | 22 (25.0%) | 52 (17.3%) | 74 (19.0%) | |
## |**primary_tumor** | | | | < 0.001|
## | Colorectal cancer | 28 (31.8%) | 134 (44.4%) | 162 (41.5%) | |
## | Esophageal cancer | 2 (2.3%) | 12 (4.0%) | 14 (3.6%) | |
## | Gastric cancer | 10 (11.4%) | 45 (14.9%) | 55 (14.1%) | |
## | NSCLC | 15 (17.0%) | 72 (23.8%) | 87 (22.3%) | |
## | Pancreatic cancer | 33 (37.5%) | 39 (12.9%) | 72 (18.5%) | |
## |**tnm_stage_grouped** | | | | 0.005|
## | I-II | 11 (12.5%) | 60 (19.9%) | 71 (18.2%) | |
## | III | 23 (26.1%) | 116 (38.4%) | 139 (35.6%) | |
## | IV | 54 (61.4%) | 126 (41.7%) | 180 (46.2%) | |
## |**grade_histological_differentiation** | | | | 0.735|
## | N-Miss | 28 | 61 | 89 | |
## | Well differentiated | 10 (16.7%) | 37 (15.4%) | 47 (15.6%) | |
## | Moderately differentiated | 28 (46.7%) | 126 (52.3%) | 154 (51.2%) | |
## | Poorly differentiated | 22 (36.7%) | 78 (32.4%) | 100 (33.2%) | |
## |**tumor_surgically_removed** | | | | 0.026|
## | No | 70 (79.5%) | 203 (67.2%) | 273 (70.0%) | |
## | Yes | 18 (20.5%) | 99 (32.8%) | 117 (30.0%) | |
## |**catheter_device** | | | | 0.029|
## | N-Miss | 0 | 1 | 1 | |
## | No | 46 (52.3%) | 196 (65.1%) | 242 (62.2%) | |
## | Yes | 42 (47.7%) | 105 (34.9%) | 147 (37.8%) | |
## |**major_surgery_in_medical_history** | | | | 0.597|
## | No | 87 (98.9%) | 296 (98.0%) | 383 (98.2%) | |
## | Yes | 1 (1.1%) | 6 (2.0%) | 7 (1.8%) | |
## |**n_times_recent_major_surgery_all_study** | | | | 0.585|
## | 0 | 66 (75.0%) | 206 (68.2%) | 272 (69.7%) | |
## | 1 | 19 (21.6%) | 83 (27.5%) | 102 (26.2%) | |
## | 2 | 2 (2.3%) | 11 (3.6%) | 13 (3.3%) | |
## | 3 | 1 (1.1%) | 2 (0.7%) | 3 (0.8%) | |
## |**n_times_major_trauma_all_study** | | | | 0.589|
## | 0 | 88 (100.0%) | 301 (99.7%) | 389 (99.7%) | |
## | 1 | 0 (0.0%) | 1 (0.3%) | 1 (0.3%) | |
## |**n_times_blood_transfusion_all_study** | | | | 0.004|
## | 0 | 55 (62.5%) | 244 (80.8%) | 299 (76.7%) | |
## | 1 | 26 (29.5%) | 49 (16.2%) | 75 (19.2%) | |
## | 2 | 5 (5.7%) | 7 (2.3%) | 12 (3.1%) | |
## | 3 | 2 (2.3%) | 2 (0.7%) | 4 (1.0%) | |
## |**n_times_immobilisation_all_study** | | | | 0.580|
## | 0 | 66 (75.0%) | 239 (79.1%) | 305 (78.2%) | |
## | 1 | 19 (21.6%) | 48 (15.9%) | 67 (17.2%) | |
## | 2 | 3 (3.4%) | 14 (4.6%) | 17 (4.4%) | |
## | 3 | 0 (0.0%) | 1 (0.3%) | 1 (0.3%) | |
## |**n_times_erythropoietin_tx_all_study** | | | | 0.116|
## | 0 | 82 (93.2%) | 287 (95.0%) | 369 (94.6%) | |
## | 1 | 5 (5.7%) | 10 (3.3%) | 15 (3.8%) | |
## | 2 | 0 (0.0%) | 5 (1.7%) | 5 (1.3%) | |
## | 3 | 1 (1.1%) | 0 (0.0%) | 1 (0.3%) | |
# Median and quantiles of age at cancer diagnosis
print("Quantiles of age at cancer diagnosis for all patients:")
## [1] "Quantiles of age at cancer diagnosis for all patients:"
quantile(oncoth1_data$age_when_cancer_dx)
## 0% 25% 50% 75% 100%
## 33 57 64 72 93
cat("\n")
print("Quantiles of age at cancer diagnosis for patients with CAT:")
## [1] "Quantiles of age at cancer diagnosis for patients with CAT:"
oncoth1_data %>%
filter(patient_group == "Case") %>%
select(age_when_cancer_dx) %>%
quantile(., na.rm = TRUE)
## 0% 25% 50% 75% 100%
## 42.00 54.75 65.00 73.00 86.00
cat("\n")
print("Quantiles of age at cancer diagnosis for patients with CAT:")
## [1] "Quantiles of age at cancer diagnosis for patients with CAT:"
oncoth1_data %>%
filter(patient_group == "Control") %>%
select(age_when_cancer_dx) %>%
quantile(., na.rm = TRUE)
## 0% 25% 50% 75% 100%
## 33 58 64 71 93
cat("\n")
# Example usage with your specific data
columns_to_test <- c(
"age_when_cancer_dx",
"weight",
"height",
"body_surface_area",
"bmi_value",
"albumin",
"aptt_ratio",
"bilirubin",
"creatinine",
"alkaline_phosphatase",
"hemoglobin",
"inr",
"leukocytes",
"platelets"
)
# Run Shapiro-Wilks normality test
shapiro_results <- shapiro_test_summary(oncoth1_data, columns_to_test)
# Print the results
for (col in names(shapiro_results)) {
print("Shapiro-Wilk normality test")
cat(col, ":", shapiro_results[[col]]$normality, "(p-value =", shapiro_results[[col]]$p_value, ")\n")
}
## [1] "Shapiro-Wilk normality test"
## age_when_cancer_dx : Normally distributed (p-value = 0.1009268 )
## [1] "Shapiro-Wilk normality test"
## weight : Not normally distributed (p-value = 0.000001926424 )
## [1] "Shapiro-Wilk normality test"
## height : Not normally distributed (p-value = 0.00415426 )
## [1] "Shapiro-Wilk normality test"
## body_surface_area : Not normally distributed (p-value = 0.00002113358 )
## [1] "Shapiro-Wilk normality test"
## bmi_value : Not normally distributed (p-value = 0.00000000000359157 )
## [1] "Shapiro-Wilk normality test"
## albumin : Not normally distributed (p-value = 0.000009828937 )
## [1] "Shapiro-Wilk normality test"
## aptt_ratio : Not normally distributed (p-value = 0.000000000144287 )
## [1] "Shapiro-Wilk normality test"
## bilirubin : Not normally distributed (p-value = 0.00000000000000000000000000000000294055 )
## [1] "Shapiro-Wilk normality test"
## creatinine : Not normally distributed (p-value = 0.00000000007557108 )
## [1] "Shapiro-Wilk normality test"
## alkaline_phosphatase : Not normally distributed (p-value = 0.0000000000000000000000000000001510757 )
## [1] "Shapiro-Wilk normality test"
## hemoglobin : Normally distributed (p-value = 0.06060825 )
## [1] "Shapiro-Wilk normality test"
## inr : Not normally distributed (p-value = 0.0000000003900261 )
## [1] "Shapiro-Wilk normality test"
## leukocytes : Not normally distributed (p-value = 0.0000000000000000000007705813 )
## [1] "Shapiro-Wilk normality test"
## platelets : Not normally distributed (p-value = 0.000000000000003072663 )
# Run Shapiro-Wilks normality test
lillie_results <- lillie_test_summary(oncoth1_data, columns_to_test)
# Print the results
for (col in names(lillie_results)) {
print("Lilliefors (Kolmogorov-Smirnov) normality test")
cat(col, ":", lillie_results[[col]]$normality, "(p-value =", lillie_results[[col]]$p_value, ")\n")
}
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## age_when_cancer_dx : Normally distributed (p-value = 0.1833302 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## weight : Not normally distributed (p-value = 0.0004022078 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## height : Not normally distributed (p-value = 0.0001375539 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## body_surface_area : Not normally distributed (p-value = 0.00000000003434639 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## bmi_value : Not normally distributed (p-value = 0.000000205658 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## albumin : Not normally distributed (p-value = 0.00000000001496203 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## aptt_ratio : Not normally distributed (p-value = 0.008137728 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## bilirubin : Not normally distributed (p-value = 0.0000000000000000000000000000000000000000000000000000000000000000000000000000008538166 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## creatinine : Not normally distributed (p-value = 0.000001242227 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## alkaline_phosphatase : Not normally distributed (p-value = 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002219109 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## hemoglobin : Not normally distributed (p-value = 0.002121832 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## inr : Not normally distributed (p-value = 0.0000000000003302228 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## leukocytes : Not normally distributed (p-value = 0.00000000000000000260768 )
## [1] "Lilliefors (Kolmogorov-Smirnov) normality test"
## platelets : Not normally distributed (p-value = 0.0000000000000002683373 )
print(paste("There are", n_var_miss(oncoth1_data), "variables with missing data."))
## [1] "There are 343 variables with missing data."
# Visual inspection of proportion of missing values
# Leave out dates and other columns with a lot of NAs for patients w/o VTE
oncoth1_data %>%
select(!c(n_appointment_patient_became_case,
patient_left_study,
anticoag_tx_lmwh_dosage,
anticoag_tx_vte_drugs,
vte_before_entering_study)) %>%
select(!contains("reason")) %>%
select(!contains("date")) %>%
select(!ends_with("_levels")) %>%
select(!starts_with("other_thromb_")) %>%
select(!starts_with("type_")) %>%
select(!starts_with("cancer_concomitant_")) %>%
select(!starts_with("eval_")) %>%
select(!starts_with("rec_")) %>%
select(!starts_with("tu_")) %>%
select(!ends_with("_prior_study")) %>%
select(!ends_with("_along_study")) %>%
select(!ends_with("_recurrence")) %>%
select_if( ~ any(is.na(.))) %>%
gg_miss_var(., show_pct = TRUE)
# Visual inspection of proportion of missing values
# Leave out dates and other columns with a lot of NAs for patients w/o VTE
gg_miss_fct(
oncoth1_data %>%
select(!contains("reason")) %>%
select(!contains("date")) %>%
select(!ends_with("_levels")) %>%
select(!starts_with("other_thromb_")) %>%
select(!starts_with("type_")) %>%
select(!starts_with("cancer_concomitant_")) %>%
select(!starts_with("eval_")) %>%
select(!starts_with("rec_")) %>%
select(!starts_with("tu_")) %>%
select(!ends_with("_prior_study")) %>%
select(!ends_with("_along_study")) %>%
select(!ends_with("_recurrence")),
fct = patient_group)
# Proportion of patient with VTE vs no VTE
table(oncoth1_data$patient_group, useNA = "ifany")
##
## Case Control
## 88 302
https://aosmith.rbind.io/2019/05/13/small-multiples-plot/
hist_plot = ggplot(oncoth1_data, aes(x = age_when_cancer_dx)) +
geom_histogram(aes(y = (..count..)/sum(..count..)),
breaks = seq(from = 25, to = 100, by = 5)) +
xlab("") +
ylab("Proportion") +
scale_x_continuous(breaks = seq(from = 20, to = 100, by = 10))
boxplot = ggplot(oncoth1_data, aes(x = age_when_cancer_dx)) +
geom_boxplot() +
theme(axis.text.y = element_blank(),
axis.ticks = element_blank(),
plot.margin = unit(c(0.0001, 0.25, 1, 1), "cm")) +
scale_x_continuous(breaks = seq(from = 20, to = 100, by = 10))
ggpubr::ggarrange(hist_plot, boxplot,
ncol = 1, nrow = 2,
#heights = c(1, 0.5),
align = "v")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Age and VTE
ggbetweenstats(
data = oncoth1_data,
x = patient_group,
y = age_when_cancer_dx,
xlab = "Patient group",
ylab = "Age at cancer diagnosis",
type = "barplot",
messages = FALSE # Set to TRUE to display additional messages
)
ggpiestats(
data = oncoth1_data,
x = gender,
y = patient_group
)
ggpiestats(
data = oncoth1_data,
x = ethnicity,
y = patient_group,
type = "nonparametric"
)
ggpiestats(
data = oncoth1_data,
x = menopausal_status,
y = patient_group,
type = "nonparametric"
)
The 64.95% of the data in this variable (NAs) corresponds to men.
ggpiestats(
data = oncoth1_data,
x = bmi_category,
y = patient_group,
type = "nonparametric"
)
ggpiestats(
data = oncoth1_data,
x = performance_status_category,
y = patient_group,
type = "nonparametric"
)
# Personal background by VTE
ggpiestats(
data = oncoth1_data,
x = personal_background,
y = patient_group,
type = "nonparametric"
)
# Major surgery in medical history by VTE
ggpiestats(
data = oncoth1_data,
x = major_surgery_in_medical_history,
y = patient_group,
type = "nonparametric"
)
ggpiestats(
data = oncoth1_data,
x = hormone_therapy,
y = patient_group,
type = "nonparametric"
)
ggpiestats(
data = oncoth1_data,
x = pregnancy,
y = patient_group,
type = "nonparametric"
)
ggpiestats(
data = oncoth1_data,
x = oral_contraceptive_tx,
y = patient_group,
type = "nonparametric"
)
ggpiestats(
data = oncoth1_data,
x = diabetes_mellitus,
y = patient_group,
type = "nonparametric"
)
fancy_barplot_percentage(oncoth1_data, dyslipidemia)
fancy_barplot_percentage(oncoth1_data, tobacco_use)
fancy_barplot_percentage(oncoth1_data, copd)
fancy_barplot_percentage(oncoth1_data, arterial_hypertension)
fancy_barplot_percentage(oncoth1_data, chronic_cardiac_insufficiency)
fancy_barplot_percentage(oncoth1_data, renal_insufficiency)
fancy_barplot_percentage(oncoth1_data, previous_onco_surgery) +
xlab("Previous Oncologic Surgery")
fancy_barplot_percentage(oncoth1_data, primary_tumor) +
theme(axis.text.x = element_text(size = 9))
fancy_barplot_percentage(oncoth1_data, tumor_surgically_removed)
fancy_barplot_percentage(oncoth1_data, progression_according_to_clinical_stage) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = c(0.5, 0.8),
legend.title = element_blank(),
legend.background = element_blank(),
legend.text=element_text(size = 11))
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fancy_barplot_percentage(oncoth1_data, tnm_stage)
fancy_barplot_percentage(oncoth1_data, t_stage)
fancy_barplot_percentage(oncoth1_data, n_stage)
fancy_barplot_percentage(oncoth1_data, m_stage)
fancy_barplot_percentage(oncoth1_data, histology_type)
fancy_barplot_percentage(oncoth1_data, mucinous_histology)
fancy_barplot_percentage(oncoth1_data, grade_histological_differentiation)
fancy_barplot_percentage(oncoth1_data, catheter_device)
fancy_barplot_percentage(oncoth1_data, venous_insufficiency)
fancy_barplot_percentage(oncoth1_data, family_background_vte_all_relatives) +
xlab("Family background - VTE") +
scale_x_discrete(limits = c("No relatives",
"Father",
"Mother",
"Siblings",
"Others",
"Offspring",
"Two relatives"))
fancy_barplot_percentage(oncoth1_data, previous_vte)
fancy_barplot_percentage(oncoth1_data, previous_ate_type) +
xlab("Type of Previous ATE") +
scale_x_discrete(limits = c("No ATE",
"Myocardial infarction",
"CVA",
"PAD",
"TIA",
"Other"))
## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_count()`).
## Removed 4 rows containing non-finite outside the scale range (`stat_count()`).
fancy_barplot_percentage(oncoth1_data, anticoag_tx_vte) +
xlab("Anticoagulant treatment due to previous VTE")
fancy_barplot_percentage(oncoth1_data, anticoag_tx_reason) +
xlab("Reason of prescription of anticoagulants") +
scale_x_discrete(limits = c("VTE",
"Cardiopathy",
"Others",
NA))
fancy_barplot_percentage(oncoth1_data, anticoag_tx_vte_drugs) +
xlab("Drugs used in anticoagulant treatments") +
scale_x_discrete(limits = c("LMWH",
"Vit K antagonists",
"Dabigatran",
NA))
fancy_barplot_percentage(oncoth1_data, anticoag_tx_currently)
fancy_barplot_percentage(oncoth1_data, cancer_diagnosed_due_to_thrombosis)
fancy_barplot_percentage(oncoth1_data, vte_dx_during_cancer_follow_up)
stacked_density_plot_and_boxplot(oncoth1_data, height, "Height")
## Warning: `stat(scaled)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(scaled)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Use Phi coefficient to see correlation between two categorical
variables, both of them adopting binary values.
psych::phi()
In case the categorical variables have more than 2 unique values, use
Crammer’s V. questionr::cramer.v()
#categorical_variables = c(variable.names(Filter(f = is.character, x = oncoth1_data)),
#variable.names(Filter(f = is.factor, x = oncoth1_data)))
categorical_variables = c(
"gender",
"ethnicity",
"menopausal_status",
"bmi_category",
"performance_status_category",
"leukocytosis",
"thrombocytosis",
"low_hemoglobin",
"personal_background",
# "other_personal_background",
"major_surgery_in_medical_history",
"hormone_therapy",
"pregnancy",
"oral_contraceptive_tx",
"diabetes_mellitus",
"dyslipidemia",
"tobacco_use",
"copd",
"arterial_hypertension",
"chronic_cardiac_insufficiency",
"renal_insufficiency",
"previous_onco_surgery",
"primary_tumor_simplified",
"tumor_surgically_removed",
"progression_according_to_clinical_stage",
"tnm_stage",
"tnm_stage_detailed",
"tnm_stage_grouped",
"t_stage",
"n_stage",
"m_stage",
"histology_type",
"mucinous_histology",
"grade_histological_differentiation",
"metastasis_dx",
"metastasis_adrenal_glands",
"metastasis_bones",
"metastasis_brain",
"metastasis_liver",
"metastasis_lung",
"metastasis_lymph_nodes",
"metastasis_peritoneum",
"metastasis_pleura",
"metastasis_skin",
"metastasis_soft_tissues",
"other_metastases",
"catheter_device",
"krs_category",
"two_groups_krs",
"tic_onco",
"venous_insufficiency",
"family_background_VTE_father",
"family_background_VTE_mother",
"family_background_VTE_siblings",
"family_background_VTE_offspring",
"family_background_VTE_other_relatives",
"previous_vte",
"previous_ate",
"cva_ate",
"myocardial_infarction_ate",
"pad_ate",
"tia_ate",
"other_thromb_ate",
"anticoag_tx_vte",
"anticoag_tx_vte_drugs",
"anticoag_tx_cardiopathy",
"anticoag_tx_other_causes",
"anticoag_tx_reason",
"anticoag_tx_apixaban",
"anticoag_tx_dabigatran",
"anticoag_tx_lmwh",
"anticoag_tx_lmwh_dosage",
"anticoag_tx_vit_k_antag",
"anticoag_tx_other_drugs",
"anticoag_tx_currently",
"antiaggreg_tx_cardiopathy",
"antiaggreg_tx_cva",
"antiaggreg_tx_other_causes",
"antiaggreg_tx_currently",
"vte_before_entering_study",
"cancer_associated_crt_vte",
"cancer_associated_ledvt_vte",
"cancer_associated_uedvt_vte",
"cancer_associated_pte_vte",
"cancer_associated_svt_vte",
"cancer_associated_visceral_thromb_vte",
"cancer_associated_other_thromb_vte"
)
# Empty list to save ggbarstats plots
ggbarstats_plots = list()
for (variable in categorical_variables[1:79]) {
plot_i = ggbarstats(data = oncoth1_data,
x = patient_group,
y = !!variable,
type = "nonparametric") +
labs(caption = NULL)
# Print plot
print(plot_i)
# Save plot in list
ggbarstats_plots[[variable]] = plot_i
}
# Get comorbidities features
comordibities = oncoth1_data %>%
select(
diabetes_mellitus,
dyslipidemia,
copd,
arterial_hypertension,
chronic_cardiac_insufficiency,
renal_insufficiency,
venous_insufficiency
) %>%
mutate_all( ~ ifelse(.x == "Yes", 1, 0)) %>%
drop_na() %>%
as.matrix()
# Correlation plot for comorbidities
psych::corPlot(
comordibities,
show.legend = TRUE,
symmetric = TRUE
)